library(magrittr)
package 㤼㸱magrittr㤼㸲 was built under R version 4.0.3
library(dplyr)
package 㤼㸱dplyr㤼㸲 was built under R version 4.0.3
Attaching package: 㤼㸱dplyr㤼㸲
The following objects are masked from 㤼㸱package:stats㤼㸲:
filter, lag
The following objects are masked from 㤼㸱package:base㤼㸲:
intersect, setdiff, setequal, union
library(ggcorrplot)
package 㤼㸱ggcorrplot㤼㸲 was built under R version 4.0.5Loading required package: ggplot2
package 㤼㸱ggplot2㤼㸲 was built under R version 4.0.5Need help? Try Stackoverflow: https://stackoverflow.com/tags/ggplot2
library(ggpubr)
package 㤼㸱ggpubr㤼㸲 was built under R version 4.0.3Registered S3 method overwritten by 'data.table':
method from
print.data.table
library(patchwork)
package 㤼㸱patchwork㤼㸲 was built under R version 4.0.5
# library("here")
# library("bookdown")
# library("downloadthis")
library(vegan)
package 㤼㸱vegan㤼㸲 was built under R version 4.0.5Loading required package: permute
Loading required package: lattice
This is vegan 2.5-7
library(plyr)
---------------------------------------------------------------------------------------------------------------------------------------
You have loaded plyr after dplyr - this is likely to cause problems.
If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
library(plyr); library(dplyr)
---------------------------------------------------------------------------------------------------------------------------------------
Attaching package: 㤼㸱plyr㤼㸲
The following object is masked from 㤼㸱package:ggpubr㤼㸲:
mutate
The following objects are masked from 㤼㸱package:dplyr㤼㸲:
arrange, count, desc, failwith, id, mutate, rename, summarise, summarize
library(e1071)
package 㤼㸱e1071㤼㸲 was built under R version 4.0.3
library(tidyverse)
package 㤼㸱tidyverse㤼㸲 was built under R version 4.0.3Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
-- Attaching packages ------------------------------------------------------------------------------------------------ tidyverse 1.3.0 --
√ tibble 3.0.4 √ purrr 0.3.4
√ tidyr 1.1.2 √ stringr 1.4.0
√ readr 1.4.0 √ forcats 0.5.0
package 㤼㸱tibble㤼㸲 was built under R version 4.0.3package 㤼㸱tidyr㤼㸲 was built under R version 4.0.3package 㤼㸱readr㤼㸲 was built under R version 4.0.3package 㤼㸱purrr㤼㸲 was built under R version 4.0.5-- Conflicts --------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
x plyr::arrange() masks dplyr::arrange()
x purrr::compact() masks plyr::compact()
x plyr::count() masks dplyr::count()
x tidyr::extract() masks magrittr::extract()
x plyr::failwith() masks dplyr::failwith()
x dplyr::filter() masks stats::filter()
x plyr::id() masks dplyr::id()
x dplyr::lag() masks stats::lag()
x plyr::mutate() masks ggpubr::mutate(), dplyr::mutate()
x plyr::rename() masks dplyr::rename()
x purrr::set_names() masks magrittr::set_names()
x plyr::summarise() masks dplyr::summarise()
x plyr::summarize() masks dplyr::summarize()
library(viridis)
Loading required package: viridisLite
library(GGally)
package 㤼㸱GGally㤼㸲 was built under R version 4.0.5Registered S3 method overwritten by 'GGally':
method from
+.gg ggplot2
library(ggrepel)
package 㤼㸱ggrepel㤼㸲 was built under R version 4.0.3
library(ggordiplots)
Loading required package: glue
Attaching package: 㤼㸱glue㤼㸲
The following object is masked from 㤼㸱package:dplyr㤼㸲:
collapse
library(readr)
library(RColorBrewer)
package 㤼㸱RColorBrewer㤼㸲 was built under R version 4.0.5
library(oce)
package 㤼㸱oce㤼㸲 was built under R version 4.0.5Loading required package: gsw
package 㤼㸱gsw㤼㸲 was built under R version 4.0.5
library(plotly)
package 㤼㸱plotly㤼㸲 was built under R version 4.0.5Registered S3 methods overwritten by 'htmltools':
method from
print.html tools:rstudio
print.shiny.tag tools:rstudio
print.shiny.tag.list tools:rstudio
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Attaching package: 㤼㸱plotly㤼㸲
The following objects are masked from 㤼㸱package:plyr㤼㸲:
arrange, mutate, rename, summarise
The following object is masked from 㤼㸱package:ggplot2㤼㸲:
last_plot
The following object is masked from 㤼㸱package:stats㤼㸲:
filter
The following object is masked from 㤼㸱package:graphics㤼㸲:
layout
library(purrr)
library(furrr)
Loading required package: future
source("gg_ordisurf_viridis.R")
LOAD OBJECTS IF HAVE RUN ALREADY
geodist_pa<-readRDS(file.path(dataPath,"inputs/SPLITS/DEEP/hiDens_b1500_geodist_pa.rds"))
cannot open compressed file 'C:/Users/a21448/Havforskningsinstituttet/NiN + MAREANO - General/inputs/SPLITS/DEEP/hiDens_b1500_geodist_pa.rds', probable reason 'No such file or directory'Error in gzfile(file, "rb") : cannot open the connection
env_sort <- read.csv(file.path(dataPath,"inputs/SPLITS/DEEP/DeepBelow1500_env_sort_2022-09-30.csv")) %>% as.data.frame
otu_sort <- read.csv(file.path(dataPath,"inputs/SPLITS/DEEP/DeepBelow1500_otu_sort_2022-09-30.csv")) %>% as.data.frame
table(env_sort$SampID2 == otu_sort$SampID)
< table of extent 0 >
#must be same length and equal to unique sample number length - the ordered lists are assumed to be directly relatable on a row by row basis
dim(env_sort)
[1] 190 138
dim(otu_sort)
[1] 190 391
env_sort <- env_sort %>% select(-c("BO22_lightbotmean_bdmean",
"BO22_lightbotltmax_bdmean",
"BO22_lightbotltmin_bdmean",
"BO22_lightbotrange_bdmean"))
Error in app$vspace(new_style$`margin-top` %||% 0) :
attempt to apply non-function
env_sort_locations<- ggplot(data = env_sort,
aes(x = X.y,
y = Y)) +
theme_classic() +
geom_point(aes(colour = bathy),
size = 2) +
scale_colour_gradient2(low = "red",
mid = "yellow",
high = "green") +
ggtitle("Location of samples in sorted env file")
env_sort_locations
## Removing NAs
otuCompl <- otu_sort[complete.cases(env_sort[, -c(1:which(colnames(env_sort)=="bathy")-1)]), ]
envCompl <- env_sort[complete.cases(env_sort[, -c(1:which(colnames(env_sort)=="bathy")-1)]), ]
## Removing observations with less than 4 OTUs
sel <- rowSums(otuCompl[, -c(1:2)]) >= 4
otuSel <- otuCompl[sel, ]
envSel <- envCompl[sel, ]
## Removing phosphate
#envSel <- envSel %>% select(-phosphate_mean.tif)
dim(otuSel); dim(envSel)
[1] 161 391
[1] 161 134
env_sel_locations<- ggplot(data = envSel,
aes(x = X.y,
y = Y)) +
theme_classic() +
geom_point(aes(colour = bathy),
size = 2) +
scale_colour_gradient2(low = "red",
mid = "yellow",
high = "green") +
ggtitle("Location of samples in sorted env file")
env_sel_locations
envCompl_ccrem<-env_sort%>%filter(!SampID%in%envCompl$SampID)
envCompl_ccrem_locations<- ggplot(data = envCompl_ccrem,
aes(x = X.y,
y = Y)) +
theme_classic() +
geom_point(aes(colour = bathy),
size = 2) +
scale_colour_gradient2(low = "red",
mid = "yellow",
high = "green") +
ggtitle("Location of removed complete case samples resulting in envSel file")
envCompl_ccrem_locations
inv.sel <- rowSums(otuCompl[, -c(1:2)]) < 4
env.invSel <- envCompl[inv.sel, ]
env.invSel_locations<- ggplot(data = env.invSel,
aes(x = X.y,
y = Y)) +
theme_classic() +
geom_point(aes(colour = bathy),
size = 2) +
scale_colour_gradient2(low = "red",
mid = "yellow",
high = "green") +
ggtitle("Location of removed samples with <4 OTUs resulting in envSel file")
env.invSel_locations
# otu_red <- otu1[1:500, ]
# otu <- otu_red
#
# env_red <- env[1:500, ]
# env <- env_red
otu<-otuSel[,-c(1:which(colnames(otuSel)=="Acesta_excavata")-1)]
env<-envSel[, -c(2:(which(colnames(env_sort)=="bathy")-1))]
table(is.na(otu))
FALSE
62629
table(is.na(env))
FALSE
18837
#str(otuSel)
#str(envSel)
# ## Class 1
# otu1 <- subset(otuSel, envSel$SplitRev == 1)
# env1 <- envSel %>% filter(SplitRev == 1)
#
# ## Class 2
# otu2 <- subset(otuSel, envSel$SplitRev == 2)
# env2 <- envSel %>% filter(SplitRev == 2)
#
# ## Class 4
# otu3 <- subset(otuSel, envSel$SplitRev == 3)
# env3 <- envSel %>% filter(SplitRev == 3)
#
# ## Class 4
# otu4 <- subset(otuSel, envSel$SplitRev == 4)
# env4 <- envSel %>% filter(SplitRev == 4)
#
# ## Class 6
# otu6 <- subset(otuSel, envSel$SplitRev == 6)
# env6 <- envSel %>% filter(SplitRev == 6)
#
# ## Class 7
# otu7 <- subset(otuSel, envSel$SplitRev == 7)
# env7 <- envSel %>% filter(SplitRev == 7)
#
# ## Class 8
# otu8 <- subset(otuSel, envSel$SplitRev == 8)
# env8 <- envSel %>% filter(SplitRev == 8)
# ## Selecting
# otu <- otu2
# env <- env2
#env$coords.x1<-as.numeric(env$coords.x1)
#env$coords.x2<-as.numeric(env$coords.x2)
Make function You might have to drop variables that have been imported as character
otu_pa <- decostand(x = otu[, -c(1)],
method = "pa")
# y = ax^w # power transformation formula
dt <- otu[, -c(1:2)] # species data to transform
x_mn <- min(dt[dt > 0])
x_mx <- max(dt)
rng <- 6 # abundance range
w <- log(rng) / (log(x_mx) - log(x_mn))
a <- x_mn^(-w)
# otu_6 <- a * dt[, -c(1:3)]^w
otu_6 <- a * dt^w
range(otu_6)
[1] 0 6
Sys.time()
[1] "2022-10-03 12:14:10 CEST"
dca_pa <- decorana(veg = otu_pa)
some species were removed because they were missing in the data
print(dca_pa, head=T)
Call:
decorana(veg = otu_pa)
Detrended correspondence analysis with 26 segments.
Rescaling of axes with 4 iterations.
DCA1 DCA2 DCA3 DCA4
Eigenvalues 0.4412 0.3659 0.2158 0.3282
Decorana values 0.4811 0.4035 0.3339 0.3026
Axis lengths 4.3022 3.5983 3.3305 4.2530
## Bray-Curtis
dist_pa <- vegdist(x = otu_pa, method = "bray")
## Geodist
ep <- 0.8 # epsilon
geodist_pa <- isomapdist(dist = dist_pa, epsilon = ep)
saveRDS(geodist_pa,
file = (file.path(dataPath,"inputs/SPLITS/DEEP/hiDens_b1500_b1500_geodist_pa.rds")))
took 6.4mins with jonatan’s paralell solution (usually ~10mins)
Using old version without furrr
Sys.time()
[1] "2022-09-30 15:06:20 CEST"
d <- 2
mds_pa <- list()
for (i in 1:100) {
mds_pa[[i]]<-monoMDS(geodist_pa,
matrix(c(runif(dim(otu_pa)[1]*d)),
nrow = dim(otu_pa)[1]),
k = d,
model = "global",
maxit = 2000,
smin = 1e-7,
sfgrmin = 1e-7)
}
Sys.time()
[1] "2022-09-30 15:06:33 CEST"
# # monoMDS
# d <- 2
# mds_r6 <- list()
#
# Sys.time()
# for (i in 1:200) {
# mds_r6[[i]]<-monoMDS(geodist_r6,
# matrix(c(runif(dim(otu_6)[1]*d)),
# nrow = dim(otu_6)[1]),
# k = d,
# model = "global",
# maxit = 2000,
# smin = 1e-7,
# sfgrmin = 1e-7)# }
# Sys.time()
# #Jonatan's upgrade for speed - parallel processing of mds reptitions
# library(furrr) # Package to run non sequential functions in parallel
# #library(purrr)
# plan(multisession)
#
# d <- 2
#
# i <-200 # number of reps
#
# List_geodist_pa <- lapply(seq_len(i), function(X) geodist_pa) # makes a list with the input data repeated as many times as reps wanted
# start_t<-Sys.time()
# Xmds_pa<-furrr::future_map(List_geodist_pa,
# function(x) monoMDS(x,
# matrix(c(runif(dim(otu_6)[1]*d)),
# nrow = dim(otu_6)[1]),
# k = d,
# model = "global",
# maxit = 2000,
# smin = 1e-7,
# sfgrmin = 1e-7),
# .progress = TRUE)
# Sys.time() - start_t
can take a few mins
saveRDS(mds_pa,
file = file.path(dataPath,"inputs/SPLITS/DEEP/hiDens_b1500_mds_pa.rds"))
Make function?
# Loading geodist object
# geodist_nfi <- readRDS(file = "../SplitRev2_geodist_pa_full.rds")
# Loading mds results
# mds <- readRDS(file = "../SplitRev2_mds_pa_full.rds")
## Extracting the stress of each nmds iteration
mds_stress_pa<-unlist(lapply(mds_pa, function(v){v[[22]]}))
ordered_pa <-order(mds_stress_pa)
## Best, second best, and worst solution
mds_stress_pa[ordered_pa[1]]
[1] 0.2601212
mds_stress_pa[ordered_pa[2]]
[1] 0.2603306
mds_stress_pa[ordered_pa[10]]
[1] 0.2608846
## Scaling of axes to half change units and varimax rotation by postMDS
mds_best_pa<-postMDS(mds_pa[[ordered_pa[1]]],
geodist_pa,
pc = TRUE,
halfchange = TRUE,
threshold = ep) # Is this threshold related to the epsilon above?
mds_best_pa
Call:
monoMDS(dist = geodist_pa, y = matrix(c(runif(dim(otu_pa)[1] * d)), nrow = dim(otu_pa)[1]), k = d, model = "global", maxit = 2000, smin = 1e-07, sfgrmin = 1e-07)
Non-metric Multidimensional Scaling
161 points, dissimilarity ‘bray shortest isomap’, call ‘isomapdist(dist = dist_pa, epsilon = ep)’
Dimensions: 2
Stress: 0.2601212
Stress type 1, weak ties
Scores scaled to unit root mean square, rotated to principal components
Stopped after 104 iterations: Stress nearly unchanged (ratio > sratmax)
mds_secbest_pa <- postMDS(mds_pa[[ordered_pa[2]]],
geodist_pa,
pc = TRUE,
halfchange = TRUE,
threshold = ep)
mds_secbest_pa
Call:
monoMDS(dist = geodist_pa, y = matrix(c(runif(dim(otu_pa)[1] * d)), nrow = dim(otu_pa)[1]), k = d, model = "global", maxit = 2000, smin = 1e-07, sfgrmin = 1e-07)
Non-metric Multidimensional Scaling
161 points, dissimilarity ‘bray shortest isomap’, call ‘isomapdist(dist = dist_pa, epsilon = ep)’
Dimensions: 2
Stress: 0.2603306
Stress type 1, weak ties
Scores scaled to unit root mean square, rotated to principal components
Stopped after 248 iterations: Stress nearly unchanged (ratio > sratmax)
## Procrustes comparisons
procr_pa <- procrustes(mds_best_pa,
mds_secbest_pa,
permutations=999)
protest(mds_best_pa,
mds_secbest_pa,
permutations=999)
Call:
protest(X = mds_best_pa, Y = mds_secbest_pa, permutations = 999)
Procrustes Sum of Squares (m12 squared): 0.1237
Correlation in a symmetric Procrustes rotation: 0.9361
Significance: 0.001
Permutation: free
Number of permutations: 999
plot(procr_pa)
png(file=file.path(dataPath,"outputs/hiDens_b1500_procrustes_pa.png"), width=1000, height=700)
plot(procr_pa)
dev.off()
png
2
# Extracting ordination axis
ax <- 2
axis_pa <- cbind(mds_best_pa$points,
scores(dca_pa,
display = "sites",
origin = TRUE)[, 1:ax])
ggcorr(axis_pa,
method=c("everything","kendall"),
label = TRUE,
label_size = 3,
label_color = "black",
nbreaks = 8,
label_round = 3,
low = "red",
mid = "white",
high = "green")
NA
NA
ggsave(filename = file.path(dataPath,"outputs/hiDens_b1500_correlationPCAvsNMDS_PA.png"),
device = "png",
dpi=300 )
Saving 7 x 7 in image
# Switching direction of NMDS1
# mds_best$points[, 1] <- -mds_best$points[, 1]
dca_r6 <- decorana(veg = otu_6)
some species were removed because they were missing in the data
print(dca_r6, head=T)
Call:
decorana(veg = otu_6)
Detrended correspondence analysis with 26 segments.
Rescaling of axes with 4 iterations.
DCA1 DCA2 DCA3 DCA4
Eigenvalues 0.4624 0.3966 0.3092 0.3106
Decorana values 0.5075 0.4233 0.3731 0.2867
Axis lengths 4.3845 4.1435 3.1853 3.4650
## Bray-Curtis
dist_r6 <- vegdist(x = otu_6, method = "bray")
## Geodist
ep <- 0.80 # epsilon
geodist_r6 <- isomapdist(dist = dist_r6, epsilon = ep)
saveRDS(geodist_r6,
file = (file.path(dataPath,"inputs/SPLITS/DEEP/hiDens_b1500_geodist_r6.rds")))
(Jonatan’s parallel solution took 5.4mins, before it took 10mins)
Using older version without furrr.
## monoMDS
d <- 2
mds_r6 <- list()
Sys.time()
[1] "2022-09-30 15:07:13 CEST"
for (i in 1:200) {
mds_r6[[i]]<-monoMDS(geodist_r6,
matrix(c(runif(dim(otu_6)[1]*d)),
nrow = dim(otu_6)[1]),
k = d,
model = "global",
maxit = 2000,
smin = 1e-7,
sfgrmin = 1e-7)
}
Sys.time()
[1] "2022-09-30 15:07:28 CEST"
# #Jonatan's upgrade for speed - parallel processing of mds reptitions
# #library(furrr) # Package to run non sequential functions in parallel
# #library(purrr)
# plan(multisession)
#
# d <- 2
#
# i <-200 # number of reps
#
# List_geodist_r6 <- lapply(seq_len(i), function(X) geodist_r6) # makes a list with the input data repeated as many times as reps wanted
# start_t<-Sys.time()
# Xmds_r6<-furrr::future_map(List_geodist_r6,
# function(x) monoMDS(x,
# matrix(c(runif(dim(otu_6)[1]*d)),
# nrow = dim(otu_6)[1]),
# k = d,
# model = "global",
# maxit = 2000,
# smin = 1e-7,
# sfgrmin = 1e-7),
# .progress = TRUE)
# Sys.time() - start_t
saveRDS(mds_r6,
file = file.path(dataPath,"inputs/SPLITS/DEEP/hiDens_b1500_mds_r6.rds"))
# Loading geodist object
# geodist_nfi <- readRDS(file = "../SplitRev2_geodist_nfi.rds")
# Loading mds results
# mds <- readRDS(file = "../SplitRev2_mds.rds")
## Extracting the stress of each nmds iteration
mds_stress_r6<-unlist(lapply(mds_r6, function(v){v[[22]]}))
ordered_r6 <-order(mds_stress_r6)
## Best, second best, and worst solution
mds_stress_r6[ordered_r6[1]]
[1] 0.2524937
mds_stress_r6[ordered_r6[2]]
[1] 0.2525011
mds_stress_r6[ordered_r6[100]]
[1] 0.2581464
## Scaling of axes to half change units and varimax rotation by postMDS
mds_best_r6<-postMDS(mds_r6[[ordered_r6[1]]],
geodist_r6,
pc = TRUE,
halfchange = TRUE,
threshold = ep) # Is this threshold related to the epsilon above?
mds_best_r6
Call:
monoMDS(dist = geodist_r6, y = matrix(c(runif(dim(otu_6)[1] * d)), nrow = dim(otu_6)[1]), k = d, model = "global", maxit = 2000, smin = 1e-07, sfgrmin = 1e-07)
Non-metric Multidimensional Scaling
161 points, dissimilarity ‘bray shortest isomap’, call ‘isomapdist(dist = dist_r6, epsilon = ep)’
Dimensions: 2
Stress: 0.2524937
Stress type 1, weak ties
Scores scaled to unit root mean square, rotated to principal components
Stopped after 70 iterations: Stress nearly unchanged (ratio > sratmax)
mds_secbest_r6<-postMDS(mds_r6[[ordered_r6[2]]],
geodist_r6,
pc = TRUE,
halfchange = TRUE,
threshold = ep)
mds_secbest_r6
Call:
monoMDS(dist = geodist_r6, y = matrix(c(runif(dim(otu_6)[1] * d)), nrow = dim(otu_6)[1]), k = d, model = "global", maxit = 2000, smin = 1e-07, sfgrmin = 1e-07)
Non-metric Multidimensional Scaling
161 points, dissimilarity ‘bray shortest isomap’, call ‘isomapdist(dist = dist_r6, epsilon = ep)’
Dimensions: 2
Stress: 0.2525011
Stress type 1, weak ties
Scores scaled to unit root mean square, rotated to principal components
Stopped after 118 iterations: Stress nearly unchanged (ratio > sratmax)
## Procrustes comparisons
procr_r6 <- procrustes(mds_best_r6,
mds_secbest_r6,
permutations=999)
protest(mds_best_r6,
mds_secbest_r6,
permutations=999)
Call:
protest(X = mds_best_r6, Y = mds_secbest_r6, permutations = 999)
Procrustes Sum of Squares (m12 squared): 0.000444
Correlation in a symmetric Procrustes rotation: 0.9998
Significance: 0.001
Permutation: free
Number of permutations: 999
plot(procr_r6)
png(file.path(dataPath,"outputs/hiDens_b1500_procrustes_r6.png"), width=1000, height=700,) #added 1000
plot(procr_r6)
dev.off()
png
2
#### 1000 reps - don’t run if loading saved object Currently commented out as it gained nothing but took extra time. Can be removed in due course.
retain the 200 rep version
# Extracting ordination axis
ax <- 2
axis_r6 <- cbind(mds_best_r6$points,
scores(dca_r6,
display = "sites",
origin = TRUE)[, 1:ax])
ggcorr(axis_r6,
method=c("everything","kendall"),
label = TRUE,
label_size = 3,
label_color = "black",
nbreaks = 8,
label_round = 3,
low = "red",
mid = "white",
high = "green")
ggsave(filename = file.path(dataPath,"outputs/hiDens_b1500_correlationPCAvsNMDS_r6.png"),
device = "png",
dpi=300 )
Saving 7 x 7 in image
# Switching direction of NMDS1
# mds_best$points[, 1] <- -mds_best$points[, 1]
## Adding scores to data frame
otu_6$gnmds1 <- mds_best_r6$points[, 1]
otu_6$gnmds2 <- mds_best_r6$points[, 2]
otu_6$dca1 <- scores(dca_r6, display = "sites", origin = TRUE)[, 1]
otu_6$dca2 <- scores(dca_r6, display = "sites", origin = TRUE)[, 2]
p_gnmds_r6 <- ggplot(data = otu_6,
aes(x = gnmds1,
y = gnmds2)) +
theme_classic() +
coord_fixed() +
ggtitle("GNMDS",
subtitle = "First run") +
geom_point(colour = "red") +
geom_vline(xintercept = 0,
linetype = 2,
colour = "lightgray") +
geom_hline(yintercept = 0,
linetype = 2,
colour = "lightgray")
p_dca_r6 <- ggplot(data = otu_6,
aes(x = dca1,
y = dca2)) +
theme_classic() +
coord_fixed() +
ggtitle("DCA",
subtitle = "First run") +
geom_point(colour = "red") +
geom_vline(xintercept = 0,
linetype = 2,
colour = "lightgray") +
geom_hline(yintercept = 0,
linetype = 2,
colour = "lightgray")
p_gnmds_r6 + p_dca_r6
NA
NA
ggsave(file.path(dataPath,"outputs/hiDens_b1500_gnmds_dca_r6.png"),
device = "png",
dpi=300)
Saving 7 x 7 in image
ord <- mds_best_r6
## Axis scores if selected ord is GNMDS
axis <- ord$points %>% as.data.frame
## Axis scores if selected ord is DCA
# axis <- scores(ord,
# display = "sites",
# origin = TRUE)[, 1:ax])
decided to make MLD-bathy vars
env<-env %>%
mutate ("MLDmean_bathy"=MLDmean_Robinson-(bathy*-1),
"MLDmin_bathy"=MLDmin_Robinson-(bathy*-1),
"MLDmax_bathy"=MLDmax_Robinson-(bathy*-1))
env$MLDmean_bathy<-cut(env$MLDmean_bathy,
breaks=c(-2560, -20,20,130),#checked range of values first (min -2554, max 123)
labels=c('belowMLD','onPycno','inMixLayer'))
env$MLDmin_bathy<-cut(env$MLDmin_bathy,
breaks=c(-2560, -20,20,130),#checked range of values first (min -2554, max 123)
labels=c('belowMLD','onPycno','inMixLayer'))
env$MLDmax_bathy<-cut(env$MLDmax_bathy,
breaks=c(-2560, -20,20,130),#checked range of values first (min -2554, max 123)
labels=c('belowMLD','onPycno','inMixLayer'))
env$swDensRob_avs<-swRho(salinity=env$Smean_Robinson,
temperature=env$Tmean_Robinson,
pressure=(env$bathy*-1),
eos="unesco")
env_cont<-env%>% select(-c(landscape,sedclass,gmorph, MLDmean_bathy, MLDmax_bathy, MLDmin_bathy, #categorical
optional, #not a var
MLDmax_Robinson, MLDmean_Robinson, MLDmin_Robinson, #replaced by new vars
MLDsd_Robinson #not meaningful
))
env_cont<-env_cont%>% mutate_if(is.integer,as.numeric)
env_corr <- env_cont #%>% select(-c(SampID))
# env_corr$coords.x1<-as.numeric(env_corr$coords.x1)
# env_corr$coords.x2<-as.numeric(env_corr$coords.x2)
env_corr[(!is.numeric(env_corr)),]
# Vector to hold correlations
cor_ax1 <- NULL
cor_ax2 <- NULL
pv_ax1 <- NULL
pv_ax2 <- NULL
# NMDS1
for( i in seq(length(env_corr))) {
ct.i <- cor.test(axis$MDS1,
env_corr[, i],
method = "kendall")
cor_ax1[i] <- ct.i$estimate
pv_ax1[i] <- ct.i$p.value
}
the standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zero
# NMDS2
for( i in seq(length(env_corr))) {
ct.i <- cor.test(axis$MDS2,
env_corr[, i],
method = "kendall")
cor_ax2[i] <- ct.i$estimate
pv_ax2[i] <- ct.i$p.value
}
the standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zero
cor_tab <- data.frame(env = names(env_corr),
ord_ax1 = cor_ax1,
pval_ax1 = pv_ax1,
ord_ax2 = cor_ax2,
pval_ax2 = pv_ax2)
cor_tab
write.csv(x = cor_tab,
file = file.path(dataPath,"inputs/SPLITS/DEEP/hiDens_b1500_cor-table_r6_200rep_MLD-bathy.csv"),
row.names = FALSE)
cor_a1_sort<-cor_tab%>%
There were 15 warnings (use warnings() to see them)
mutate(abs_ord_ax1=abs(ord_ax1),
abs_ord_ax2=abs(ord_ax2)) %>%
arrange(desc(abs_ord_ax1))
cor_a2_sort<-cor_tab%>%
mutate(abs_ord_ax1=abs(ord_ax1),
abs_ord_ax2=abs(ord_ax2)) %>%
arrange(desc(abs_ord_ax2))
dotchart(cor_a1_sort$abs_ord_ax1, main="Absolute (+/-) correlations between envVars and gnmds axis 1")
cor_cut<-0.50 #decide
cor_sel<-subset(cor_a1_sort,abs_ord_ax1>cor_cut)
cor_sel
as.data.frame(cor_sel$env)
env_os <- env[, cor_sel$env]
env_os
str(env_os)
'data.frame': 161 obs. of 11 variables:
$ swDensRob_avs : num 1037 1037 1036 1036 1035 ...
$ bathy : num -1961 -2009 -1598 -1596 -1594 ...
$ Tmean_Robinson : num -0.246 -0.251 0.45 0.528 0.607 ...
$ Smax_Robinson : num 35 34.9 35 35 35 ...
$ Tmax_Robinson : num -0.234 -0.234 0.498 0.579 0.66 ...
$ Smean_Robinson : num 34.9 34.9 35 35 35 ...
$ Tmin_Robinson : num -0.271 -0.299 0.382 0.458 0.534 ...
$ Smin_Robinson : num 34.9 34.9 35 35 35 ...
$ BO22_carbonphytoltmin_bdmean: num 0.0199 0.0199 0.0197 0.0197 0.0197 ...
$ BO22_ironmean_bdmean : num 0.000684 0.000687 0.000674 0.000673 0.000673 ...
$ BO22_ironrange_bdmean : num 0.000684 0.000687 0.000674 0.000673 0.000673 ...
ordsrfs <- list(length = ncol(env_os))
for (i in seq(ncol(env_os))) {
os.i <- gg_ordisurf(ord = ord,
env.var = env_os[, i],
pt.size = 1,
# binwidth = 0.05,
var.label = names(env_os)[i],
gen.text.size = 10,
title.text.size = 15,
leg.text.size = 10
)
ordsrfs[[i]] <- os.i$plot
}
ordsrfs_plt <- ggarrange(plotlist = ordsrfs,
nrow = 4,
ncol = 3)
ordsrfs_plt
ggexport(ordsrfs_plt,
filename = file.path(dataPath,"outputs/hiDens_b1500_ordisurfs_top_corr.png"),
width = 1000,
height = 1000)
env_os_m <- env[,c("Tmean_Robinson", #top corr
"salt_max", #top corr
"Smax_Robinson", #comparison to top corr
"swDensRob_avs", #top corr
"BO22_icecoverltmax_ss",#top corr ax2
"BO22_icecovermean_ss",#top corr
"BO22_dissoxmean_bdmean",#top corr
#"BO22_carbonphytoltmin_bdmean",#top corr - no clear gradient in ordisurf
"BO22_ppltmin_ss", #top corr
"X.y", #comparison to Y
"Y", #top corr
"spd_std", #top corr ax2 (blended model)
"CSpdsd_Robinson", #comparison to top corr ax2 (blended model)
"mud", #highest sed var ax1 + corr
"gravel",#highest sed var ax1 - corr
"BO22_silicateltmax_bdmean", #just under top corr ax1
"bathy", #intuitive for comparisons
"slope9"#intuitive for comparisons
)]
env_os_m
str(env_os_m)
'data.frame': 161 obs. of 17 variables:
$ Tmean_Robinson : num -0.246 -0.251 0.45 0.528 0.607 ...
$ salt_max : num 34.9 34.9 35 35 35 ...
$ Smax_Robinson : num 35 34.9 35 35 35 ...
$ swDensRob_avs : num 1037 1037 1036 1036 1035 ...
$ BO22_icecoverltmax_ss : num 9.78e-05 9.71e-05 0.00 0.00 0.00 ...
$ BO22_icecovermean_ss : num 1.22e-05 1.25e-05 0.00 0.00 0.00 ...
$ BO22_dissoxmean_bdmean : num 304 304 293 293 293 ...
$ BO22_ppltmin_ss : num 0.00 0.00 1.49e-05 1.49e-05 1.49e-05 ...
$ X.y : num 463061 470861 554861 555061 555261 ...
$ Y : num 8311934 8295534 7744134 7744134 7744134 ...
$ spd_std : num 0.0511 0.0508 0.0726 0.0736 0.0748 ...
$ CSpdsd_Robinson : num 0.014566 0.01478 0.000513 0.000492 0.00047 ...
$ mud : num 96 96 69.5 69.5 69.5 0 0 80 0 0 ...
$ gravel : num 0 0 0.5 0.5 0.5 0 0 0 0 0 ...
$ BO22_silicateltmax_bdmean: num 13.2 13.4 10.8 10.8 10.8 ...
$ bathy : num -1961 -2009 -1598 -1596 -1594 ...
$ slope9 : num 0.814 0.672 3.516 3.255 3.39 ...
ordsrfs_m <- list(length = ncol(env_os_m))
for (i in seq(ncol(env_os_m))) {
os.i_m <- gg_ordisurf(ord = ord,
env.var = env_os_m[, i],
pt.size = 1,
# binwidth = 0.05,
var.label = names(env_os_m)[i],
gen.text.size = 10,
title.text.size = 15,
leg.text.size = 10)
ordsrfs_m[[i]] <- os.i_m$plot
}
ordsrfs_plt_m <- ggarrange(plotlist = ordsrfs_m,
nrow = 6,
ncol = 3)
ordsrfs_plt_m
ggexport(ordsrfs_plt_m,
filename = file.path(dataPath,"outputs/hiDens_b1500_ordisurfs_man_sel_domean.png"),
width = 2000,
height = 2000)
## Select if any var should be excluded from envfit (makes less busy to read)
env_os_m_envfit<-env_os_m [,c("Tmean_Robinson", #top corr
"salt_max", #top corr
"Smax_Robinson", #comparison to top corr
"swDensRob_avs", #top corr
"BO22_icecoverltmax_ss",#top corr ax2
#"BO22_icecovermean_ss",#top corr
"BO22_dissoxmean_bdmean",#top corr
#"BO22_dissoxltmin_bdmean",#top corr
#"BO22_carbonphytoltmin_bdmean",#top corr - no clear gradient in ordisurf
"BO22_ppltmin_ss", #top corr
"X.y", #comparison to Y
"Y", #top corr
"spd_std", #top corr ax2 (blended model)
# "CSpdsd_Robinson", #comparison to top corr ax2 (blended model)
"mud", #highest sed var ax1 + corr
"gravel",#highest sed var ax1 - corr
"BO22_silicateltmax_bdmean", #just under top corr ax1
"bathy" #intuitive for comparisons
)]
colnames(env_os_m_envfit)<-c("T", #top corr
"sMx", #top corr
"SmaxR", #comparison to top corr
"swDensR", #top corr
"icecovmax",#top corr ax2
#"icecovav",#top corr
"dissoxav",#top corr
#"dissoxmin",#top corr
#"BO22_carbonphytoltmin_bdmean",#top corr - no clear gradient in ordisurf
"ppltmin", #top corr
"X", #comparison to Y
"Y", #top corr
"spd_std", #top corr ax2 (blended model)
# "CSsd", #comparison to top corr ax2 (blended model)
"mud", #highest sed var ax1 + corr
"gravel",#highest sed var ax1 - corr
"SiLtmax", #just under top corr ax1
"bathy" #intuitive for comparisons
)
## Envfot plot
gg_envfit(ord = ord,
env = env_os_m_envfit,
pt.size = 1)
## Envfit analysis
ef <- envfit(ord = ord,
env = env_os_m_envfit,
# na.rm = TRUE
)
efDF <- as.data.frame(scores(ef,
display = "vectors"))
ggsave(filename = file.path(dataPath,"outputs/hiDens_b1500_EnvFit_man_sel_cln_domean.png"),
device = "png",
dpi=300 )
Saving 7 x 7 in image
env_vis<-env
env_vis$gnmds1 <- otu_6$gnmds1
env_vis$gnmds2 <- otu_6$gnmds2
env_vis$dca1 <- otu_6$dca1
env_vis$dca2 <- otu_6$dca2
env_vis$SampID <- envSel$SampID
dca_int <- ggplot(data = env_vis,
aes(x = dca1,
y = dca2)) +
theme_classic() +
coord_fixed() +
ggtitle("Interactive DCA sample ID plot",
subtitle = "First run") +
geom_point(aes(colour = factor(SampID))) +
geom_vline(xintercept = 0,
linetype = 2,
colour = "lightgray") +
geom_hline(yintercept = 0,
linetype = 2,
colour = "lightgray")
ggplotly(dca_int)
p_mld <- ggplot(data = env_vis,
aes(x = gnmds1,
y = gnmds2)) +
theme_classic() +
coord_fixed() +
ggtitle("GNMDS coloured by proximity to mixed layer depth",
subtitle = "First run") +
geom_point(aes(colour = MLDmean_bathy)) +
geom_vline(xintercept = 0,
linetype = 2,
colour = "lightgray") +
geom_hline(yintercept = 0,
linetype = 2,
colour = "lightgray")
p_mld
env_vis<- env_vis %>%
mutate(
sedclassName = case_when(
sedclass == "1" ~ "SedCoverR",
sedclass == "5" ~ "Rock",
sedclass == "20" ~ "Mud",
sedclass == "21" ~ "MwBlock",
sedclass == "40" ~ "sMud",
sedclass == "80" ~ "mSand",
sedclass == "100" ~ "Sand",
sedclass == "110" ~ "gMud",
sedclass == "115" ~ "gsMud",
sedclass == "120" ~ "gmSand",
sedclass == "130" ~ "gSand",
sedclass == "150" ~ "MSG",
sedclass == "160" ~ "sGravel",
sedclass == "170" ~ "Gravel",
sedclass == "175" ~ "GravBlock",
sedclass == "185" ~ "SGBmix",
sedclass == "205" ~ "S/MwB",
sedclass == "206" ~ "S/MwG/B",
sedclass == "215" ~ "SGBalt",
sedclass == "300" ~ "HardSed",
sedclass == "500" ~ "Biogenic"
)
)
c25 <- c(
"dodgerblue2", "#E31A1C", # red
"green4",
"#6A3D9A", # purple
"#FF7F00", # orange
"black", "gold1",
"skyblue2", "#FB9A99", # lt pink
"palegreen2",
"#CAB2D6", # lt purple
"#FDBF6F", # lt orange
"gray70", "khaki2",
"maroon", "orchid1", "hiDens_b1500pink1", "blue1", "steelblue4",
"darkturquoise", "green1", "yellow4", "yellow3",
"darkorange4", "brown"
)
p_sed <- ggplot(data = env_vis,
aes(x = gnmds1,
y = gnmds2)) +
theme_classic() +
coord_fixed() +
ggtitle("GNMDS coloured by sediment class",
subtitle = "First run") +
geom_point(aes(colour = factor(sedclassName))) +
scale_colour_manual(values=c25)+
geom_vline(xintercept = 0,
linetype = 2,
colour = "lightgray") +
geom_hline(yintercept = 0,
linetype = 2,
colour = "lightgray")+
guides(colour=guide_legend(ncol=2))
p_sed
env_vis<- env_vis %>%
mutate(
landscapeName = case_when(
landscape == "1" ~ "Strandflat",
landscape == "21" ~ "ContSlope",
landscape == "22" ~ "Canyon",
landscape == "31" ~ "Valley",
landscape == "32" ~ "Fjord",
landscape == "41" ~ "DeepSeaPlane",
landscape == "42" ~ "SlopePlain",
landscape == "43" ~ "ShelfPlain",
landscape == "431" ~ "shallowValley"
)
)
p_land <- ggplot(data = env_vis,
aes(x = gnmds1,
y = gnmds2)) +
theme_classic() +
coord_fixed() +
ggtitle("GNMDS coloured by landscape class",
subtitle = "First run") +
geom_point(aes(colour = factor(landscapeName))) +
geom_vline(xintercept = 0,
linetype = 2,
colour = "lightgray") +
geom_hline(yintercept = 0,
linetype = 2,
colour = "lightgray")+
guides(colour=guide_legend(ncol=2))
p_land
p_gmo <- ggplot(data = env_vis,
aes(x = gnmds1,
y = gnmds2)) +
theme_classic() +
coord_fixed() +
ggtitle("GNMDS coloured by landscape class",
subtitle = "First run") +
geom_point(aes(colour = factor(gmorph))) +
geom_vline(xintercept = 0,
linetype = 2,
colour = "lightgray") +
geom_hline(yintercept = 0,
linetype = 2,
colour = "lightgray")+
guides(colour=guide_legend(ncol=2))
p_gmo
cat_var_plots<-p_mld+p_sed+p_land+p_gmo
ggexport(cat_var_plots,
filename = file.path(dataPath,"outputs/hiDens_b1500_gnmds_catvar.png"),
width = 1000,
height = 800)
p_gmo <- ggplot(data = env_vis,
aes(x = gnmds1,
y = gnmds2)) +
theme_classic() +
coord_fixed() +
ggtitle("GNMDS coloured by sample",
subtitle = "First run") +
geom_point(aes(colour = factor(SampID))) +
geom_vline(xintercept = 0,
linetype = 2,
colour = "lightgray") +
geom_hline(yintercept = 0,
linetype = 2,
colour = "lightgray")+
guides(colour="none", size="none")
ggplotly(p_gmo)